#!/bin/sh
#
#
# OpenStack Cinder Volume (cinder-volume)
#
# Description:  Manages an OpenStack Volumes (cinder-volume) process as an HA resource
#
# Authors:      Sébastien Han
# Mainly inspired by the Glance API resource agent written by Martin Gerhard Loschwitz from Hastexo: http://goo.gl/whLpr
#
# Support:      openstack@lists.launchpad.net
# License:      Apache Software License (ASL) 2.0
#
# (c) 2012      hastexo Professional Services GmbH
#
# Copyright (c) 2014 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
#
# See usage() function below for more details ...
#
# OCF instance parameters:
#   OCF_RESKEY_binary
#   OCF_RESKEY_config
#   OCF_RESKEY_user
#   OCF_RESKEY_pid
#   OCF_RESKEY_amqp_server_port
#   OCF_RESKEY_additional_parameters
#######################################################################
# Initialization:

: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
. /usr/bin/tsconfig

#######################################################################

# Fill in some defaults if no values are specified

OCF_RESKEY_binary_default="cinder-volume"
OCF_RESKEY_config_default="/etc/cinder/cinder.conf"
OCF_RESKEY_user_default="cinder"
OCF_RESKEY_pid_default="$HA_RSCTMP/$OCF_RESOURCE_INSTANCE.pid"
OCF_RESKEY_amqp_server_port_default="5672"
OCF_RESKEY_multibackend_default="false"

: ${OCF_RESKEY_binary=${OCF_RESKEY_binary_default}}
: ${OCF_RESKEY_config=${OCF_RESKEY_config_default}}
: ${OCF_RESKEY_user=${OCF_RESKEY_user_default}}
: ${OCF_RESKEY_pid=${OCF_RESKEY_pid_default}}
: ${OCF_RESKEY_amqp_server_port=${OCF_RESKEY_amqp_server_port_default}}
: ${OCF_RESKEY_multibackend=${OCF_RESKEY_multibackend_default}}

#######################################################################

#######################################################################

#
# The following file is used to determine if Cinder-Volume should be
# failed if the AMQP check does not pass.  Cinder-Volume initializes
# it's backend before connecting to Rabbit.  In Ceph configurations,
# Cinder-Volume will not connect to Rabbit until the storage blades
# are provisioned (this can take a long time, no need to restart the
# process over and over again).
VOLUME_FAIL_ON_AMQP_CHECK_FILE="$HA_RSCTMP/$OCF_RESOURCE_INSTANCE.fail_on_amqp_check"

#######################################################################


usage() {
    cat <<UEND
        usage: $0 (start|stop|validate-all|meta-data|status|monitor)

        $0 manages an OpenStack Cinder Volume (cinder-volume) process as an HA resource

        The 'start' operation starts the volume service.
        The 'stop' operation stops the volume service.
        The 'validate-all' operation reports whether the parameters are valid
        The 'meta-data' operation reports this RA's meta-data information
        The 'status' operation reports whether the volume service is running
        The 'monitor' operation reports whether the volume service seems to be working

UEND
}

meta_data() {
    cat <<END
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="cinder-volume">
<version>1.0</version>

<longdesc lang="en">
Resource agent for the OpenStack Cinder Volume (cinder-volume)
May manage a cinder-volume instance or a clone set that
creates a distributed cinder-volume cluster.
</longdesc>
<shortdesc lang="en">Manages the OpenStack Cinder Volume (cinder-volume)</shortdesc>
<parameters>

<parameter name="binary" unique="0" required="0">
<longdesc lang="en">
Location of the OpenStack Cinder Volume server binary (cinder-volume)
</longdesc>
<shortdesc lang="en">OpenStack Cinder Volume server binary (cinder-volume)</shortdesc>
<content type="string" default="${OCF_RESKEY_binary_default}" />
</parameter>

<parameter name="config" unique="0" required="0">
<longdesc lang="en">
Location of the OpenStack Cinder Volume (cinder-volume) configuration file
</longdesc>
<shortdesc lang="en">OpenStack Cinder Volume (cinder-volume) config file</shortdesc>
<content type="string" default="${OCF_RESKEY_config_default}" />
</parameter>

<parameter name="user" unique="0" required="0">
<longdesc lang="en">
User running OpenStack Cinder Volume (cinder-volume)
</longdesc>
<shortdesc lang="en">OpenStack Cinder Volume (cinder-volume) user</shortdesc>
<content type="string" default="${OCF_RESKEY_user_default}" />
</parameter>

<parameter name="pid" unique="0" required="0">
<longdesc lang="en">
The pid file to use for this OpenStack Cinder Volume (cinder-volume) instance
</longdesc>
<shortdesc lang="en">OpenStack Cinder Volume (cinder-volume) pid file</shortdesc>
<content type="string" default="${OCF_RESKEY_pid_default}" />
</parameter>

<parameter name="amqp_server_port" unique="0" required="0">                                                                                                                                                              
<longdesc lang="en">                                                                                                                                                                                                      
The listening port number of the AMQP server. Mandatory to perform a monitor check                                                                                                                                        
</longdesc>                                                                                                                                                                                                               
<shortdesc lang="en">AMQP listening port</shortdesc>                                                                                                                                                                      
<content type="integer" default="${OCF_RESKEY_amqp_server_port_default}" />                                                                                                                                              
</parameter>      
 
<parameter name="multibackend" unique="0" required="0">
<longdesc lang="en">
If the multi-backend is enabled, the monitor check is slightly different since cinder-volume spawns one thread for each backend.
</longdesc>
<shortdesc lang="en">Multi Backend usage</shortdesc>
<content type="boolean" default="${OCF_RESKEY_multibackend_default}" />
</parameter>

<parameter name="additional_parameters" unique="0" required="0">
<longdesc lang="en">
Additional parameters to pass on to the OpenStack Cinder Volume (cinder-volume)
</longdesc>
<shortdesc lang="en">Additional parameters for cinder-volume</shortdesc>
<content type="string" />
</parameter>

</parameters>

<actions>
<action name="start" timeout="10" />
<action name="stop" timeout="10" />
<action name="status" timeout="10" />
<action name="monitor" timeout="5" interval="10" />
<action name="validate-all" timeout="5" />
<action name="meta-data" timeout="5" />
</actions>
</resource-agent>
END
}

#######################################################################
# Functions invoked by resource manager actions

cinder_volume_validate() {
    local rc

    check_binary $OCF_RESKEY_binary
    check_binary netstat
    
    # A config file on shared storage that is not available
    # during probes is OK.
    if [ ! -f $OCF_RESKEY_config ]; then
        if ! ocf_is_probe; then
            ocf_log err "Config $OCF_RESKEY_config doesn't exist"
            return $OCF_ERR_INSTALLED
        fi
        ocf_log_warn "Config $OCF_RESKEY_config not available during a probe"
    fi

    getent passwd $OCF_RESKEY_user >/dev/null 2>&1
    rc=$?
    if [ $rc -ne 0 ]; then
        ocf_log err "User $OCF_RESKEY_user doesn't exist"
        return $OCF_ERR_INSTALLED
    fi

    true
}

cinder_volume_status() {
    local pid
    local rc

    if [ ! -f $OCF_RESKEY_pid ]; then
        ocf_log info "OpenStack Cinder Volume (cinder-volume) is not running"
        return $OCF_NOT_RUNNING
    else
        pid=`cat $OCF_RESKEY_pid`
    fi

    ocf_run -warn kill -s 0 $pid
    rc=$?
    if [ $rc -eq 0 ]; then
        return $OCF_SUCCESS
    else
        ocf_log info "Old PID file found, but OpenStack Cinder Volume (cinder-volume) is not running"
        rm -f $OCF_RESKEY_pid
        return $OCF_NOT_RUNNING
    fi
}

cinder_volume_get_service_status() {
    local status_file=${VOLATILE_PATH}/cinder-volume.down-after
    if [[ -f "${status_file}" ]]; then
        echo "exists=True"
        if [[ "$(date -u +'%Y-%m-%d %H:%M:%S.%N')" < "$(cat ${status_file})" ]]; then
            echo "is_up=True"
        else
            echo "is_up=False"
        fi
        return 0
    else
        echo "exists=False"
        echo "is_up=False"
        return 1
    fi
}

cinder_volume_monitor() {
    local rc
    local pid
    local volume_amqp_check
    local check_service_status=$1; shift

    cinder_volume_status
    rc=$?

    # If status returned anything but success, return that immediately
    if [ $rc -ne $OCF_SUCCESS ]; then
        return $rc
    fi

    # Grab cinder-volume PID
    pid=`cat $OCF_RESKEY_pid`

    if ocf_is_true "$OCF_RESKEY_multibackend"; then
      pids=`ps -o pid --no-headers --ppid $pid`
      rc=$?
      if [ $rc -ne 0 ]; then
        ocf_log err "No child processes from Cinder Volume (yet...): $rc"
        return $OCF_NOT_RUNNING
      fi

      # Grab the child's PIDs
      for i in `ps -o pid --no-headers --ppid $pid`
      do
        volume_amqp_check=`netstat -punt | grep -s "$OCF_RESKEY_amqp_server_port" | grep -s "$i" | grep -qs "ESTABLISHED"`
        rc=$?
        if [ $rc -ne 0 ]; then
          if [ -e "$VOLUME_FAIL_ON_AMQP_CHECK_FILE" ]; then
            ocf_log err "This child process from Cinder Volume is not connected to the AMQP server: $rc"
            return $OCF_NOT_RUNNING
          else
            ocf_log info "Cinder Volume initializing, child process is not connected to the AMQP server: $rc"
            return $OCF_SUCCESS
          fi
        fi
      done
    else
      # Check the connections according to the PID
      # We are sure to hit the scheduler process and not other nova process with the same connection behavior (for example nova-cert)
      # check the connections according to the PID
      volume_amqp_check=`netstat -punt | grep -s "$OCF_RESKEY_amqp_server_port" | grep -s "$pid" | grep -qs "ESTABLISHED"`
      rc=$?
      if [ $rc -ne 0 ]; then
        if [ -e "$VOLUME_FAIL_ON_AMQP_CHECK_FILE" ]; then
          ocf_log err "Cinder Volume is not connected to the AMQP server: $rc"
          return $OCF_NOT_RUNNING
        else
          ocf_log info "Cinder Volume initializing, not connected to the AMQP server: $rc"
          return $OCF_SUCCESS
        fi
      fi
    fi

    touch $VOLUME_FAIL_ON_AMQP_CHECK_FILE  >> /dev/null 2>&1

    if [ $check_service_status == "check-service-status" ]; then
        local retries_left
        local retry_interval

        retries_left=3
        retry_interval=3
        while [ $retries_left -gt 0 ]; do
            retries_left=`expr $retries_left - 1`
            status=$(cinder_volume_get_service_status)
            rc=$?
            if [ $rc -ne 0 ]; then
                ocf_log err "Unable to get Cinder Volume status"
                if [ $retries_left -gt 0 ]; then
                    sleep $retry_interval
                    continue
                else
                    return $OCF_ERR_GENERIC
                fi
            fi

            local exists
            local is_up
            eval $status

            if [ "$exists" == "True" ] && [ "$is_up" == "False" ]; then
                ocf_log err "Cinder Volume service status is down"
                if [ $retries_left -gt 0 ]; then
                    sleep $retry_interval
                    continue
                else
                    ocf_log info "Trigger Cinder Volume guru meditation report"
                    ocf_run kill -s USR2 $pid
                    return $OCF_ERR_GENERIC
                fi
            fi

            break
        done
    fi

    ocf_log debug "OpenStack Cinder Volume (cinder-volume) monitor succeeded"
    return $OCF_SUCCESS
}

cinder_volume_start() {
    local rc

    if [ -e "$VOLUME_FAIL_ON_AMQP_CHECK_FILE" ] ; then
        rm $VOLUME_FAIL_ON_AMQP_CHECK_FILE >> /dev/null 2>&1 
    fi

    cinder_volume_status
    rc=$?
    if [ $rc -eq $OCF_SUCCESS ]; then
        ocf_log info "OpenStack Cinder Volume (cinder-volume) already running"
        return $OCF_SUCCESS
    fi

    # run the actual cinder-volume daemon. Don't use ocf_run as we're sending the tool's output
    # straight to /dev/null anyway and using ocf_run would break stdout-redirection here.
    su ${OCF_RESKEY_user} -s /bin/sh -c "${OCF_RESKEY_binary} --config-file=$OCF_RESKEY_config \
       $OCF_RESKEY_additional_parameters"' >> /dev/null 2>&1 & echo $!' > $OCF_RESKEY_pid

    # Spin waiting for the server to come up.
    # Let the CRM/LRM time us out if required
    while true; do
    cinder_volume_monitor
    rc=$?
    [ $rc -eq $OCF_SUCCESS ] && break
    if [ $rc -ne $OCF_NOT_RUNNING ]; then
        ocf_log err "OpenStack Cinder Volume (cinder-volume) start failed"
        exit $OCF_ERR_GENERIC
    fi
    sleep 1
    done

    ocf_log info "OpenStack Cinder Volume (cinder-volume) started"
    return $OCF_SUCCESS
}

cinder_volume_confirm_stop() {
    local my_bin
    local my_processes

    if [ -e "$VOLUME_FAIL_ON_AMQP_CHECK_FILE" ] ; then
        rm $VOLUME_FAIL_ON_AMQP_CHECK_FILE >> /dev/null 2>&1 
    fi
    
    my_binary=`which ${OCF_RESKEY_binary}`
    my_processes=`pgrep -l -f "^(python|/usr/bin/python|/usr/bin/python2) ${my_binary}([^\w-]|$)"`

    if [ -n "${my_processes}" ]
    then
        ocf_log info "About to SIGKILL the following: ${my_processes}"
        pkill -KILL -f "^(python|/usr/bin/python|/usr/bin/python2) ${my_binary}([^\w-]|$)"
    fi
}

cinder_volume_stop() {
    local rc
    local pid

    if [ -e "$VOLUME_FAIL_ON_AMQP_CHECK_FILE" ] ; then
        rm $VOLUME_FAIL_ON_AMQP_CHECK_FILE >> /dev/null 2>&1 
    fi
    
    cinder_volume_status
    rc=$?
    if [ $rc -eq $OCF_NOT_RUNNING ]; then
        ocf_log info "OpenStack Cinder Volume (cinder-volume) already stopped"
        cinder_volume_confirm_stop
        return $OCF_SUCCESS
    fi

    # Try SIGTERM
    pid=`cat $OCF_RESKEY_pid`
    ocf_run kill -s TERM $pid
    rc=$?
    if [ $rc -ne 0 ]; then
        ocf_log err "OpenStack Cinder Volume (cinder-volume) couldn't be stopped"
        cinder_volume_confirm_stop
        exit $OCF_ERR_GENERIC
    fi

    # stop waiting
    shutdown_timeout=15
    if [ -n "$OCF_RESKEY_CRM_meta_timeout" ]; then
        shutdown_timeout=$((($OCF_RESKEY_CRM_meta_timeout/1000)-5))
    fi
    count=0
    while [ $count -lt $shutdown_timeout ]; do
        cinder_volume_status
        rc=$?
        if [ $rc -eq $OCF_NOT_RUNNING ]; then
            break
        fi
        count=`expr $count + 1`
        sleep 1
        ocf_log debug "OpenStack Cinder Volume (cinder-volume) still hasn't stopped yet. Waiting ..."
    done

    cinder_volume_status
    rc=$?
    if [ $rc -ne $OCF_NOT_RUNNING ]; then
        # SIGTERM didn't help either, try SIGKILL
        ocf_log info "OpenStack Cinder Volume (cinder-volume) failed to stop after ${shutdown_timeout}s \
          using SIGTERM. Trying SIGKILL ..."
        ocf_run kill -s KILL -$pid
    fi
    cinder_volume_confirm_stop

    ocf_log info "OpenStack Cinder Volume (cinder-volume) stopped"

    rm -f $OCF_RESKEY_pid

    return $OCF_SUCCESS
}

#######################################################################

case "$1" in
  meta-data)    meta_data
                exit $OCF_SUCCESS;;
  usage|help)   usage
                exit $OCF_SUCCESS;;
esac

# Anything except meta-data and help must pass validation
cinder_volume_validate || exit $?

# What kind of method was invoked?
case "$1" in
  start)        cinder_volume_start;;
  stop)         cinder_volume_stop;;
  status)       cinder_volume_status;;
  monitor)      cinder_volume_monitor "check-service-status";;
  validate-all) ;;
  *)            usage
                exit $OCF_ERR_UNIMPLEMENTED;;
esac
